import numpy as np
import pandas as pd
import scanpy as sc
import scipy
import os
import scipy.io as sio
sc.settings.verbosity = 1 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=100, fontsize=10, dpi_save=300, figsize=(4,4), format='png')
data_path='./9301-CT_Output/all-well/DGE_filtered/'
# The DGE_filtered folder contains the expression matrix, genes, and files
adata4 = sc.read_mtx(data_path+'DGE.mtx')
# reading in gene and cell data
gene_data = pd.read_csv(data_path + 'all_genes.csv')
cell_meta = pd.read_csv(data_path + 'cell_metadata.csv')
# find genes with nan values and filter
gene_data = gene_data[gene_data.gene_name.notnull()]
notNa = gene_data.index
notNa = notNa.to_list()
# remove genes with nan values and assign gene names
adata4 = adata4[:,notNa]
adata4.var = gene_data
adata4.var.set_index('gene_name', inplace=True)
adata4.var.index.name = None
adata4.var_names_make_unique()
# add cell meta data to anndata object
adata4.obs = cell_meta
adata4.obs.set_index('bc_wells', inplace=True)
adata4.obs.index.name = None
adata4.obs_names_make_unique()
sc.pp.filter_cells(adata4, min_genes=300)
sc.pp.filter_genes(adata4, min_cells=5)
# Returns the dimensions of the expression matrix (cells, genes)
adata4.shape
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:798: UserWarning:
AnnData expects .var.index to contain strings, but got values like:
[0, 1, 2, 3, 4]
Inferred to be: integer
value_idx = self._prep_dim_index(value.index, attr)
(17201, 29390)
data_path='./10268-CT/Jason_252_12_WTm/DGE_filtered/'
# The DGE_filtered folder contains the expression matrix, genes, and files
adata2 = sc.read_mtx(data_path+'DGE.mtx')
# reading in gene and cell data
gene_data = pd.read_csv(data_path + 'all_genes.csv')
cell_meta = pd.read_csv(data_path + 'cell_metadata.csv')
# find genes with nan values and filter
gene_data = gene_data[gene_data.gene_name.notnull()]
notNa = gene_data.index
notNa = notNa.to_list()
# remove genes with nan values and assign gene names
#adata2 = adata2[:,notNa]
adata2.var = gene_data
adata2.var.set_index('gene_name', inplace=True)
adata2.var.index.name = None
adata2.var_names_make_unique()
# add cell meta data to anndata object
adata2.obs = cell_meta
adata2.obs.set_index('bc_wells', inplace=True)
adata2.obs.index.name = None
adata2.obs_names_make_unique()
sc.pp.filter_cells(adata2, min_genes=300)
sc.pp.filter_genes(adata2, min_cells=1)
# Returns the dimensions of the expression matrix (cells, genes)
adata2.shape
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:798: UserWarning:
AnnData expects .var.index to contain strings, but got values like:
[0, 1, 2, 3, 4]
Inferred to be: integer
value_idx = self._prep_dim_index(value.index, attr)
(5420, 26265)
data_path='./10268-CT/Jason_252_34_YT2/DGE_filtered/'
# The DGE_filtered folder contains the expression matrix, genes, and files
adata3 = sc.read_mtx(data_path+'DGE.mtx')
# reading in gene and cell data
gene_data = pd.read_csv(data_path + 'all_genes.csv')
cell_meta = pd.read_csv(data_path + 'cell_metadata.csv')
# find genes with nan values and filter
gene_data = gene_data[gene_data.gene_name.notnull()]
notNa = gene_data.index
notNa = notNa.to_list()
# remove genes with nan values and assign gene names
#adata3 = adata3[:,notNa]
adata3.var = gene_data
adata3.var.set_index('gene_name', inplace=True)
adata3.var.index.name = None
adata3.var_names_make_unique()
# add cell meta data to anndata object
adata3.obs = cell_meta
adata3.obs.set_index('bc_wells', inplace=True)
adata3.obs.index.name = None
adata3.obs_names_make_unique()
sc.pp.filter_cells(adata3, min_genes=300)
sc.pp.filter_genes(adata3, min_cells=5)
# Returns the dimensions of the expression matrix (cells, genes)
adata3.shape
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:798: UserWarning:
AnnData expects .var.index to contain strings, but got values like:
[0, 1, 2, 3, 4]
Inferred to be: integer
value_idx = self._prep_dim_index(value.index, attr)
(4534, 19364)
adata3
AnnData object with n_obs × n_vars = 4534 × 19364
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'mread_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes'
var: 'gene_id', 'genome', 'n_cells'
sc.external.pp.scrublet(adata2)
sc.external.pp.scrublet(adata3)
sc.external.pp.scrublet(adata4)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy. view_to_actual(adata)
Automatically set threshold at doublet score = 0.55 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 0.3% Overall doublet rate: Expected = 5.0% Estimated = 17.6%
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy. view_to_actual(adata)
Automatically set threshold at doublet score = 0.44 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 0.5% Overall doublet rate: Expected = 5.0% Estimated = 8.2%
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy. view_to_actual(adata)
Automatically set threshold at doublet score = 0.68 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 0.3% Overall doublet rate: Expected = 5.0% Estimated = 2.1%
adata2 = adata2[adata2.obs.doublet_score < 0.1, :]
adata3 = adata3[adata3.obs.doublet_score < 0.1, :]
adata4 = adata4[adata4.obs.doublet_score < 0.1, :]
adata5 = adata2.concatenate(adata3, batch_key='sample', batch_categories = ['252 1/2', '252 3/4'], join='outer')
adata5
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:1785: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour. [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
AnnData object with n_obs × n_vars = 7876 × 26304
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'mread_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet'
var: 'gene_id-252 1/2', 'genome-252 1/2', 'n_cells-252 1/2', 'gene_id-252 3/4', 'genome-252 3/4', 'n_cells-252 3/4'
adata = adata4.concatenate(adata5, batch_key='batch', batch_categories = ['1', '2'], join='outer')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:1785: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour. [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
adata.var['mt'] = adata.var_names.str.startswith('mt-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata = adata[adata.obs.n_genes_by_counts < 5000,:]
adata = adata[adata.obs.total_counts < 20000,:]
adata = adata[adata.obs.pct_counts_mt < 15,:]
adata.shape
(23218, 30759)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy. view_to_actual(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.25)
sc.pl.highly_variable_genes(adata, save='') # scanpy generates the filename automatically
# Save raw expression values before variable gene subset
adata.raw = adata
WARNING: saving figure to file figures/filter_genes_dispersion.png
sc.pp.regress_out(adata, ['tscp_count', 'pct_counts_mt'], n_jobs=12)
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.external.pp.harmony_integrate(adata, 'batch', max_iter_harmony=50)
sc.pp.neighbors(adata, use_rep='X_pca_harmony', n_neighbors=10, n_pcs=35, random_state=0)
sc.tl.umap(adata, random_state=0)
sc.tl.leiden(adata, resolution=0.6, random_state=0)
sc.pl.umap(adata, color=['leiden'], legend_fontsize=8)
2023-08-20 09:53:55,942 - harmonypy - INFO - Iteration 1 of 50 2023-08-20 09:53:58,945 - harmonypy - INFO - Iteration 2 of 50 2023-08-20 09:54:02,384 - harmonypy - INFO - Iteration 3 of 50 2023-08-20 09:54:05,328 - harmonypy - INFO - Iteration 4 of 50 2023-08-20 09:54:08,141 - harmonypy - INFO - Iteration 5 of 50 2023-08-20 09:54:11,323 - harmonypy - INFO - Iteration 6 of 50 2023-08-20 09:54:14,642 - harmonypy - INFO - Iteration 7 of 50 2023-08-20 09:54:17,673 - harmonypy - INFO - Iteration 8 of 50 2023-08-20 09:54:20,393 - harmonypy - INFO - Iteration 9 of 50 2023-08-20 09:54:23,190 - harmonypy - INFO - Converged after 9 iterations /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
adata
AnnData object with n_obs × n_vars = 23218 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.set_figure_params(dpi=100, figsize=(4,4))
sc.pl.umap(adata, color=['leiden'], title='')
sc.pl.umap(adata, color=['sample'], title='')
sc.pl.umap(adata, color=['batch'], groups = '1', title='')
sc.pl.umap(adata, color=['batch'], groups = '2', title='')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
pd.crosstab(adata.obs['sample'], adata.obs['leiden'])
| leiden | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sample | |||||||||||||||||||||
| 35-2-34-3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 252 1/2 | 717 | 1486 | 20 | 626 | 363 | 85 | 156 | 186 | 10 | 11 | ... | 11 | 5 | 2 | 24 | 58 | 1 | 44 | 98 | 64 | 29 |
| 252 3/4 | 615 | 1173 | 27 | 860 | 215 | 79 | 127 | 62 | 16 | 0 | ... | 6 | 9 | 5 | 21 | 63 | 1 | 25 | 36 | 59 | 9 |
| 252-7 | 3 | 65 | 91 | 170 | 31 | 44 | 0 | 1 | 41 | 11 | ... | 0 | 1 | 0 | 18 | 2 | 4 | 0 | 0 | 0 | 2 |
| 252-8 | 0 | 16 | 17 | 12 | 1 | 0 | 0 | 0 | 6 | 3 | ... | 0 | 0 | 1 | 1 | 1 | 5 | 0 | 0 | 0 | 1 |
| 252-9 | 5 | 272 | 124 | 152 | 141 | 314 | 2 | 0 | 92 | 6 | ... | 3 | 6 | 0 | 13 | 5 | 1 | 0 | 0 | 5 | 6 |
| 263-4 | 0 | 23 | 62 | 6 | 2 | 0 | 0 | 0 | 18 | 63 | ... | 1 | 1 | 1 | 1 | 1 | 8 | 0 | 0 | 0 | 1 |
| 263-6 | 335 | 108 | 121 | 117 | 186 | 119 | 209 | 278 | 97 | 5 | ... | 30 | 52 | 13 | 30 | 23 | 16 | 17 | 3 | 9 | 14 |
| 263-7 | 266 | 106 | 338 | 15 | 152 | 71 | 168 | 127 | 116 | 1 | ... | 60 | 189 | 52 | 19 | 18 | 45 | 31 | 11 | 4 | 17 |
| 264-3 | 770 | 82 | 318 | 19 | 215 | 145 | 130 | 101 | 118 | 30 | ... | 118 | 20 | 91 | 26 | 14 | 38 | 26 | 18 | 4 | 14 |
| 273-7 | 632 | 50 | 114 | 8 | 75 | 154 | 32 | 26 | 111 | 79 | ... | 115 | 78 | 25 | 6 | 14 | 11 | 15 | 3 | 3 | 6 |
| 273-8 | 236 | 65 | 11 | 25 | 77 | 103 | 36 | 79 | 28 | 2 | ... | 24 | 49 | 3 | 7 | 6 | 2 | 11 | 2 | 3 | 4 |
| 307-1-261-2 | 0 | 1 | 2 | 4 | 0 | 0 | 2 | 0 | 0 | 56 | ... | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 |
| 307-2-286-1 | 3 | 19 | 180 | 14 | 6 | 0 | 13 | 0 | 12 | 394 | ... | 2 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 6 | 0 |
| 2704- | 21 | 0 | 335 | 0 | 5 | 1 | 11 | 9 | 23 | 6 | ... | 3 | 1 | 18 | 2 | 3 | 29 | 1 | 1 | 2 | 0 |
| 2705- | 9 | 8 | 401 | 1 | 4 | 5 | 8 | 9 | 56 | 6 | ... | 2 | 1 | 92 | 1 | 3 | 44 | 2 | 0 | 0 | 1 |
| 2706- | 234 | 8 | 299 | 4 | 8 | 5 | 4 | 12 | 27 | 4 | ... | 45 | 3 | 42 | 0 | 2 | 6 | 23 | 0 | 2 | 1 |
| 3083-34-4 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 13 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| C57-1 | 177 | 76 | 28 | 29 | 146 | 46 | 51 | 40 | 42 | 13 | ... | 24 | 4 | 2 | 38 | 6 | 4 | 3 | 3 | 2 | 7 |
| C57-2 | 20 | 138 | 7 | 87 | 70 | 57 | 17 | 12 | 11 | 8 | ... | 1 | 0 | 0 | 15 | 1 | 0 | 0 | 0 | 2 | 3 |
| C57-15 | 2 | 35 | 22 | 13 | 9 | 5 | 0 | 0 | 12 | 9 | ... | 0 | 1 | 2 | 4 | 4 | 1 | 0 | 0 | 0 | 1 |
| C57-25 | 2 | 148 | 32 | 127 | 38 | 19 | 1 | 2 | 48 | 28 | ... | 0 | 1 | 1 | 34 | 3 | 6 | 0 | 1 | 0 | 2 |
22 rows × 22 columns
adata = adata[adata.obs['sample'].isin([
'C57-1',
'C57-15',
'C57-2',
'C57-25',
'263-6',
'263-7',
'2704-',
'252-9',
'263-4',
'264-3',
'273-7',
'273-8',
'2705-',
'2706-',
'252-7',
'252-8',
'252 1/2',
'252 3/4'
])]
adata
View of AnnData object with n_obs × n_vars = 22444 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
adata.obs.loc[adata.obs["sample"].isin(['C57-1',
'C57-15',
'C57-2',
'C57-25']), "genotype"] = "C57B6"
adata.obs.loc[adata.obs["sample"].isin(['263-6',
'263-7',
'2704-',
'252-9',
'263-4', '252 1/2']), "genotype"] = "WT"
adata.obs.loc[adata.obs["sample"].isin(['264-3',
'273-7',
'273-8',
'2705-',
'2706-',
'252-7',
'252-8', '252 3/4']), "genotype"] = "YT-deleted"
adata.obs.loc[adata.obs["sample"].isin(['C57-2',
'C57-25','252-9',
'263-4','252-7',
'252-8', '252 1/2', '252 3/4']), "treatment"] = "Saline"
adata.obs.loc[adata.obs["sample"].isin(['C57-1',
'C57-15','263-6',
'263-7',
'2704-','264-3',
'273-7',
'273-8',
'2705-',
'2706-']), "treatment"] = "Bleomycin"
adata.obs.loc[adata.obs["sample"].isin(['C57-2',
'C57-25',
'252-9',
'263-4', '252 1/2']), "group"] = "Control Veh"
adata.obs.loc[adata.obs["sample"].isin(['252-7',
'252-8', '252 3/4']), "group"] = "YT-deleted Veh"
adata.obs.loc[adata.obs["sample"].isin(['C57-1',
'C57-15','263-6',
'263-7',
'2704-']), "group"] = "Control bleo"
adata.obs.loc[adata.obs["sample"].isin(['264-3',
'273-7',
'273-8',
'2705-',
'2706-']), "group"] = "YT-deleted bleo"
#reembed and cluster subset
sc.tl.pca(adata, svd_solver='arpack')
sc.external.pp.harmony_integrate(adata, 'batch', max_iter_harmony=50)
sc.pp.neighbors(adata, use_rep='X_pca_harmony', n_neighbors=10, n_pcs=35, random_state=0)
sc.tl.umap(adata, random_state=0)
sc.tl.leiden(adata, resolution=0.6, random_state=0)
sc.pl.umap(adata, color=['leiden'], legend_fontsize=8)
2023-08-20 09:58:36,291 - harmonypy - INFO - Iteration 1 of 50 2023-08-20 09:58:39,170 - harmonypy - INFO - Iteration 2 of 50 2023-08-20 09:58:42,041 - harmonypy - INFO - Iteration 3 of 50 2023-08-20 09:58:46,017 - harmonypy - INFO - Iteration 4 of 50 2023-08-20 09:58:48,696 - harmonypy - INFO - Iteration 5 of 50 2023-08-20 09:58:51,456 - harmonypy - INFO - Iteration 6 of 50 2023-08-20 09:58:54,680 - harmonypy - INFO - Iteration 7 of 50 2023-08-20 09:58:57,618 - harmonypy - INFO - Iteration 8 of 50 2023-08-20 09:59:00,473 - harmonypy - INFO - Iteration 9 of 50 2023-08-20 09:59:03,431 - harmonypy - INFO - Converged after 9 iterations
adata
AnnData object with n_obs × n_vars = 22444 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.set_figure_params(dpi=100, figsize=(4,4))
sc.pl.umap(adata, color=['leiden'], title='')
sc.pl.umap(adata, color=['group'], title='')
sc.pl.umap(adata, color=['group'], groups = 'Control Veh', title='Control PBS')
sc.pl.umap(adata, color=['group'], groups = 'YT-deleted Veh', title='YT-deleted PBS')
sc.pl.umap(adata, color=['group'], groups = 'Control bleo', title='Control Bleo')
sc.pl.umap(adata, color=['group'], groups = 'YT-deleted bleo', title='YT-deleted Bleo')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
marker_genes = ['Ptprc', 'Col1a1', 'Pecam1', 'Epcam', 'Nkx2-1', "Sox2", 'Sox9', 'Trp63', 'Foxj1', 'Scgb1a1', 'Scgb3a1', 'Scgb3a2', 'Muc5b', 'Abca3', 'Sftpc', 'Sftpd', 'Lamp3', 'Krt8', 'Cdkn1a', 'Gdf15', 'Cldn4', 'Lgals3', 'Sox4', 'Hopx', 'Ager', 'Rtkn2', 'Vegfa', 'Wnt3a', 'Pdgfa', 'Col4a3', 'Col4a4', 'Pou2f3', 'Pdgfra', 'Wnt2', 'Tcf21', 'Scube2', 'Wnt5a', 'Pi16', 'Sfrp4','Col1a2', 'Col3a1', 'Col6a1', 'Cthrc1', 'Fn1', "Aspn", 'Wif1', 'Acta2', 'Cspg4', 'Pdgfrb', 'Lgr5', 'Lgr6', 'Tgfbi', 'Wt1', 'Plvap', 'Hey1', 'Car4', 'Itgam', 'Itgax', 'Cd68', 'Cd14', 'Cd86', 'Pparg', 'Spp1', 'Cpa3', 'Prf1', 'Cd3e', 'Cd4', 'Cd8a', 'Il7r', 'Foxp3', 'Ms4a1', 'Jchain', 'Irf7', 'Cd34', 'Ly6a', 'Plp1', 'Cdh4', 'Csmd1', 'Mki67']
sc.tl.dendrogram(adata, groupby='leiden')
sc.pl.dotplot(adata, marker_genes, groupby='leiden', standard_scale='var', cmap='YlGnBu', dendrogram=True)
sc.pl.matrixplot(adata, marker_genes, groupby='leiden', standard_scale='var', cmap='YlGnBu', dendrogram=True)
sc.pl.umap(adata, color=['leiden'], title='', legend_loc='on data')
#sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
#sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
sc.tl.leiden(adata,restrict_to=('leiden', ['6']), resolution=0.2, key_added='subcluster')
sc.tl.leiden(adata,restrict_to=('subcluster', ['7']), resolution=0.2, key_added='subcluster1')
sc.tl.leiden(adata,restrict_to=('subcluster1', ['3']), resolution=0.2, key_added='subcluster2')
sc.pl.umap(adata, color=['subcluster2'], title='', legend_loc='on data')
sc.tl.dendrogram(adata, 'subcluster2')
sc.pl.matrixplot(adata, marker_genes, groupby='subcluster2', standard_scale='var', cmap='YlGnBu', dendrogram=True)
sc.tl.rank_genes_groups(adata, 'subcluster2', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2(
#remove low-quality/doublet (19,22)
adata = adata[adata.obs['leiden'].isin(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21'])]
sc.tl.leiden(adata,restrict_to=('subcluster2', ['6,1']), resolution=0.2, key_added='subcluster3')
sc.tl.dendrogram(adata, 'subcluster3')
sc.pl.matrixplot(adata, marker_genes, groupby='subcluster3', standard_scale='var', cmap='YlGnBu', dendrogram=True)
#remove low-quality/doublet ('7,3', 19,22)
adata = adata[adata.obs['subcluster3'].isin(['0', '1', '2', '3,0', '3,1', '3,2', '4', '5', '6,0', '6,1,0', '6,1,2', '6,1,2', '7,0', '7,1', '7,2', '7,4', '7,5', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21'])]
tmp = adata.obs['subcluster3'].cat.categories
tmp = ['Intermediate FB' if item == '0' else item for item in tmp]
tmp = ['Alveolar FB' if item == '1' else item for item in tmp]
tmp = ['AT2' if item == '2' else item for item in tmp]
tmp = ['iMO' if item == '3,0' else item for item in tmp]
tmp = ['MDM' if item == '3,1' else item for item in tmp]
tmp = ['AM' if item == '3,2' else item for item in tmp]
tmp = ['Adventitial FB' if item == '4' else item for item in tmp]
tmp = ['MyoFB' if item == '5' else item for item in tmp]
tmp = ['Intermediate alveolar - stressed' if item == '6,0' else item for item in tmp]
tmp = ['AT1 - mature' if item == '6,1,0' else item for item in tmp]
tmp = ['AT1 - immature' if item == '6,1,2' else item for item in tmp]
tmp = ['Intermediate alveolar - cell-cycle arrested' if item == '6,1,1' else item for item in tmp]
tmp = ['moDC' if item == '7,0' else item for item in tmp]
tmp = ['T cells' if item == '7,1' else item for item in tmp]
tmp = ['Treg' if item == '7,2' else item for item in tmp]
tmp = ['NK' if item == '7,4' else item for item in tmp]
tmp = ['Mast' if item == '7,5' else item for item in tmp]
tmp = ['Intermediate alveolar' if item == '8' else item for item in tmp]
tmp = ['Mesothelial' if item == '9' else item for item in tmp]
tmp = ['MCC' if item == '10' else item for item in tmp]
tmp = ['FB - proliferating' if item == '11' else item for item in tmp]
tmp = ['Plasma/pDC' if item == '12' else item for item in tmp]
tmp = ['cDC' if item == '13' else item for item in tmp]
tmp = ['Capillary' if item == '14' else item for item in tmp]
tmp = ['Pericyte' if item == '15' else item for item in tmp]
tmp = ['SMC' if item == '16' else item for item in tmp]
tmp = ['Macrophage - proliferating' if item == '17' else item for item in tmp]
tmp = ['Activated FB' if item == '18' else item for item in tmp]
tmp = ['Secretory' if item == '20' else item for item in tmp]
tmp = ['Glial' if item == '21' else item for item in tmp]
adata.rename_categories('subcluster3', tmp)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:1160: FutureWarning: The `inplace` parameter in pandas.Categorical.rename_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object. self.obs[key].cat.rename_categories(categories, inplace=True)
sc.set_figure_params(figsize=(5,5))
sc.pl.umap(adata, color=['subcluster3'], title='')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual. self.data[key] = value
sc.set_figure_params(figsize=(4,4))
sc.pl.umap(adata, color=['subcluster3'], title='', legend_fontsize='small', save='celltype.png')
pd.crosstab(adata.obs['subcluster3'], adata.obs['group'], normalize='columns')
| group | Control Veh | Control bleo | YT-deleted Veh | YT-deleted bleo |
|---|---|---|---|---|
| subcluster3 | ||||
| Intermediate FB | 0.169226 | 0.169294 | 0.205804 | 0.310621 |
| Alveolar FB | 0.274109 | 0.051566 | 0.247642 | 0.025654 |
| AT2 | 0.167230 | 0.033080 | 0.255865 | 0.009314 |
| iMO | 0.019042 | 0.067523 | 0.018380 | 0.078105 |
| MDM | 0.000614 | 0.053707 | 0.001451 | 0.080556 |
| AM | 0.017813 | 0.042226 | 0.012817 | 0.026634 |
| Adventitial FB | 0.077703 | 0.086593 | 0.044256 | 0.053105 |
| MyoFB | 0.073710 | 0.048453 | 0.029988 | 0.068301 |
| Intermediate alveolar - stressed | 0.009214 | 0.060712 | 0.010157 | 0.024183 |
| AT1 - mature | 0.013974 | 0.007200 | 0.017170 | 0.005556 |
| AT1 - immature | 0.001382 | 0.002335 | 0.000000 | 0.001307 |
| moDC | 0.006910 | 0.014205 | 0.005320 | 0.029412 |
| T cells | 0.010749 | 0.025491 | 0.005804 | 0.009150 |
| Treg | 0.005068 | 0.009535 | 0.001935 | 0.014869 |
| NK | 0.000921 | 0.003308 | 0.000000 | 0.000000 |
| Mast | 0.000154 | 0.001751 | 0.000000 | 0.001961 |
| Intermediate alveolar | 0.015356 | 0.086398 | 0.010399 | 0.034804 |
| Mesothelial | 0.026259 | 0.042421 | 0.038694 | 0.025490 |
| MCC | 0.035319 | 0.034442 | 0.020073 | 0.015033 |
| FB - proliferating | 0.002457 | 0.022962 | 0.001451 | 0.050817 |
| Plasma/pDC | 0.002150 | 0.047869 | 0.002418 | 0.024837 |
| cDC | 0.004760 | 0.017513 | 0.005320 | 0.042647 |
| Capillary | 0.017506 | 0.006616 | 0.003386 | 0.019771 |
| Pericyte | 0.013206 | 0.018097 | 0.010157 | 0.006536 |
| SMC | 0.010442 | 0.010508 | 0.015719 | 0.006536 |
| Macrophage - proliferating | 0.002457 | 0.018875 | 0.002418 | 0.016503 |
| Activated FB | 0.004760 | 0.007589 | 0.014994 | 0.011928 |
| Secretory | 0.011057 | 0.003308 | 0.014752 | 0.001961 |
| Glial | 0.006450 | 0.006421 | 0.003628 | 0.004412 |
adata.obs['subcluster3'].cat.reorder_categories([
'Activated FB',
'Adventitial FB',
'Alveolar FB',
'AM',
'AT1 - immature',
'AT1 - mature',
'AT2',
'Capillary',
'cDC',
'FB - proliferating',
'Glial',
'iMO',
'Intermediate alveolar',
'Intermediate alveolar - stressed',
'Intermediate FB',
'Macrophage - proliferating',
'Mast',
'MCC',
'MDM',
'Mesothelial',
'moDC',
'MyoFB',
'NK',
'Pericyte',
'Plasma/pDC',
'Secretory',
'SMC',
'T cells',
'Treg'
], inplace=True)
<ipython-input-62-84d32cded238>:1: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object. adata.obs['subcluster3'].cat.reorder_categories([
sc.set_figure_params(figsize=(4,4))
sc.pl.umap(adata, color=['group'], legend_fontsize='small', title='')
sc.pl.umap(adata, color=['subcluster3'], legend_fontsize='x-small', title='', save='celltype.png')
WARNING: saving figure to file figures/umapcelltype.png
results_file='./yap_deleted_20230820.h5ad'
adata.write(results_file)
sc.tl.embedding_density(adata, groupby='group')
sc.pl.embedding_density(adata, groupby='group', save='embedding_density.png')
WARNING: saving figure to file figures/umap_density_group_embedding_density.png
sc.pl.umap(adata, color=['group'], groups='Control Veh', legend_fontsize='small', title='Control PBS', save='control_pbs.png')
sc.pl.umap(adata, color=['group'], groups='YT-deleted Veh', legend_fontsize='small', title='YT PBS', save='yt_pbs.png')
sc.pl.umap(adata, color=['group'], groups='Control bleo', legend_fontsize='small', title='Control Bleomycin', save='control_bleo.png')
sc.pl.umap(adata, color=['group'], groups='YT-deleted bleo', legend_fontsize='small', title='YT Bleomycin', save='yt_bleo.png')
WARNING: saving figure to file figures/umapcontrol_pbs.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
WARNING: saving figure to file figures/umapyt_pbs.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
WARNING: saving figure to file figures/umapcontrol_bleo.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
WARNING: saving figure to file figures/umapyt_bleo.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only. values = values.replace(values.categories.difference(groups), np.nan) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead. values = values.replace(values.categories.difference(groups), np.nan)
stromal = adata[adata.obs['subcluster3'].isin([
'Alveolar FB', 'Activated FB', 'Adventitial FB', 'FB - proliferating', 'Intermediate FB', 'MyoFB', 'Pericyte', 'SMC'
])]
stromal
View of AnnData object with n_obs × n_vars = 11829 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
fb_genes = ['Pdgfra', 'Tcf21', 'Lgr5', 'Lgr6', 'Acta2', 'Tgfbi', 'Tagln', 'Pi16', 'Sfrp4', 'Mfap5', 'Lum', 'Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Eln', 'Fgfr4', 'Bmp5', 'Axin2', 'Rspo2', 'Wnt2', 'Wnt5a', 'Wnt7b', 'Fgf2', 'Fgf7', 'Fgf10', 'Fgf18', 'Tgfb1', 'Tgfb2', 'Tgfb3', 'Ccn2', 'Cthrc1', 'Fap', 'Loxl2', 'Snai1', 'Snai2', 'Twist1', 'Gdf15', 'Mki67' ]
fb_genes2 = ['Pdgfra', 'Scube2', 'Tcf21', 'Lgr5', 'Lgr6', 'Acta2', 'Tgfbi', 'Tagln', 'Pi16', 'Ccl11', 'Adh7', 'Sfrp4', 'Mfap5', 'Lum', 'Cthrc1', 'Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Eln', 'Rspo2', 'Wnt2', 'Wnt5a', 'Tgfb1', 'Tgfb2', 'Mki67']
fb_genes2 = ['Pdgfra', 'Scube2', 'Tcf21', 'Wnt2','Pi16', 'Sfrp4', 'Mfap5', 'Lum', 'Cthrc1', 'Fap', 'Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Wnt5a', 'Aspn', 'Tgfb1', 'Tgfb2', 'Mki67', 'Lgr6', 'Acta2', "Cspg4", 'Tgfbi', 'Myl9']
fb_genes3 = ['Col1a1', 'Col3a1', 'Pdgfra', 'Scube2', 'Wnt2', 'Cthrc1', 'Fn1', 'Limch1', 'Mfap5','Pi16', 'Aspn','Lum', 'Wnt5a', 'Mki67', 'Top2a', 'Mki67', 'Pdgfrb',"Cspg4", 'Kcnq5', 'Lgr6', 'Acta2', 'Myl9']
fb_genes4 = ['Col1a1', 'Fn1', 'Scube2', 'Cthrc1', 'Pdgfra', 'Gpc6', 'Hhip', 'Mki67', "Cspg4", 'Acta2']
sc.tl.dendrogram(stromal, 'subcluster3')
sc.pl.dotplot(stromal, fb_genes4, groupby='subcluster3', cmap='YlGnBu', save='fb_marker_dotplot.png')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual. self.data[key] = value
WARNING: saving figure to file figures/dotplot_fb_marker_dotplot.png
act_fb = adata[adata.obs['subcluster3'].isin([
'Activated FB', 'Proliferating FB', 'Intermediate FB'
])]
act_fb
View of AnnData object with n_obs × n_vars = 4929 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
act_fb_genes = ['Pdgfra', 'Tcf21', 'Scube2', 'Cthrc1', 'Acta2', 'Tgfbi', 'Tagln','Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Eln', 'Fgfr4', 'Bmp5', 'Axin2', 'Rspo2', 'Wnt2', 'Wnt5a', 'Wnt7b']
sc.pl.matrixplot(act_fb, act_fb_genes, groupby='group', cmap='YlGnBu', standard_scale='var', save='act_fb_genes.png')
WARNING: saving figure to file figures/matrixplot_act_fb_genes.png
sc.pl.stacked_violin(act_fb, fb_genes2, groupby='group', jitter=True, swap_axes=True)
act_fb2 = act_fb[act_fb.obs['group'].isin([
'Control bleo', 'YT-deleted bleo'
])]
act_fb2
View of AnnData object with n_obs × n_vars = 2883 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.pl.stacked_violin(act_fb2, ['Col1a1', 'Col3a1', 'Col5a3', 'Col6a3', 'Loxl2', 'Tns1', 'Fn1'], groupby='group', jitter=True, swap_axes=True, save='act_fb_genes.png')
WARNING: saving figure to file figures/stacked_violin_act_fb_genes.png
sc.pl.dotplot(act_fb, ['Col1a1', 'Col3a1', 'Col5a3', 'Col6a3', 'Loxl2', 'Tns1', 'Fn1'], groupby='group', standard_scale='var', cmap="YlGnBu", save='act_fb_genes_all.png')
sc.pl.dotplot(act_fb2, ['Col1a1', 'Col3a1', 'Fn1'], groupby='group', swap_axes=True, save='act_fb_genes.png', cmap='YlGnBu')
sc.pl.dotplot(act_fb2, ['Ccl2', 'Mki67'], groupby='group', swap_axes=True, cmap='YlGnBu')
at2 = adata[adata.obs['subcluster3'].isin([
'AT2'
])]
at2
View of AnnData object with n_obs × n_vars = 2374 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.pl.dotplot(at2, ['Nkx2-1', 'Scgb1a1', 'Sftpb', 'Sftpc', 'Sftpd', 'Lamp3', 'Abca3', 'Slc34a2', 'Lrrk2', 'Ager'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_lin.png')
sc.pl.stacked_violin(at2, ['Nkx2-1', 'Scgb1a1', 'Sftpb', 'Sftpc', 'Sftpd', 'Lamp3', 'Abca3', 'Slc34a2', 'Lrrk2', 'Ager'], groupby='group', cmap='YlGnBu', standard_scale='var', swap_axes=True, save='at2_genes_lin_violin.png')
WARNING: saving figure to file figures/dotplot_at2_genes_lin.png
WARNING: saving figure to file figures/stacked_violin_at2_genes_lin_violin.png
sc.pl.matrixplot(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Runx2', 'Serpine1', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='tran_genes_at2.png')
sc.pl.stacked_violin(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Runx2', 'Serpine1', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', swap_axes=True, save='trans_genes_at2.png')
sc.pl.dotplot(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_at2.png')
sc.pl.dotplot(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Serpine1', 'Cdkn1a', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='intermed_genes_at2.png')
sc.pl.dotplot(intermed, ['Tgfb1', 'Tgfb2', 'Itgav', 'Serpine1', 'Cdkn1a', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='intermed_genes_intermed.png')
sc.pl.matrixplot(at1, ['Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_at1.png')
at1 = adata[adata.obs['subcluster3'].isin(['AT1 - mature', 'AT1 - immature'
])]
at1
View of AnnData object with n_obs × n_vars = 262 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.pl.dotplot(at1, ['Nkx2-1', 'Sox2', 'Scgb1a1', 'Scgb3a2', 'Muc5b', 'Sftpd', 'Lamp3', 'Abca3', 'Slc34a2', 'Lrrk2', 'Ager', 'Hopx', 'Rtkn2', 'Wnt3a', ], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_lin.png')
WARNING: saving figure to file figures/dotplot_at2_genes_lin.png
epi = adata[adata.obs['subcluster3'].isin(['AT1','Intermediate alveolar',
'Intermediate alveolar - stressed',
'Intermediate alveolar - cell-cycle arrested', 'AT2', 'MCC', 'Secretory'
])]
epi
intermed = adata[adata.obs['subcluster3'].isin(['Intermediate alveolar',
'Intermediate alveolar - stressed',
'AT1 - immature'
])]
intermed
View of AnnData object with n_obs × n_vars = 1391 × 30759
obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
sc.pl.matrixplot(intermed, ['Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_trans.png')
sc.pp.log1p(intermed)
sc.tl.rank_genes_groups(intermed, 'group', method='wilcoxon')
sc.pl.rank_genes_groups(intermed, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_simple.py:373: UserWarning: Received a view of an AnnData. Making a copy. view_to_actual(adata) /opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_simple.py:352: RuntimeWarning: invalid value encountered in log1p np.log1p(X, out=X)
sc.pl.dotplot(intermed, ['Nkx2-1','Sox2', 'Sox9', 'Cebpa', 'Meg3', 'Trp63', 'Krt8', 'Etv5', 'Scgb1a1', 'Scgb3a2', 'Muc5b', 'Abca3', 'Lamp3', 'Sftpb', 'Sftpc', 'Sftpd', 'Ager', 'Hopx', 'Rtkn2', 'Wnt3a', 'Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var')
sc.pl.dotplot(at1, ['Nkx2-1','Sox2', 'Sox9', 'Cebpa', 'Meg3', 'Trp63', 'Krt8', 'Etv5', 'Scgb1a1', 'Scgb3a2', 'Muc5b', 'Abca3', 'Lamp3', 'Sftpb', 'Sftpc', 'Sftpd', 'Ager', 'Hopx', 'Rtkn2', 'Wnt3a', 'Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var')
sc.pl.dotplot(intermed, ['Tgfb1', 'Tgfb2', 'Itgav', 'Serpine1', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='aec_genes_trans.png')
WARNING: saving figure to file figures/dotplot_aec_genes_trans.png
sc.pl.dotplot(intermed, ['Ccl2'], groupby='group', cmap='YlGnBu', standard_scale='var', save='intermed_ccl2.png')
sc.pl.dotplot(act_fb, ['Ccl2'], groupby='group', cmap='YlGnBu', standard_scale='var', save='act_fb_ccl2.png')
WARNING: saving figure to file figures/dotplot_intermed_ccl2.png
WARNING: saving figure to file figures/dotplot_act_fb_ccl2.png
sc.settings.set_figure_params(dpi=100, fontsize=14, dpi_save=300, figsize=(4,4), format='png')
markers = ['Col1a1', 'Cthrc1', 'Pdgfra', 'Plp1', 'Pi16', 'Wt1', 'Aplnr', 'Mki67', 'Cspg4', 'Aspn', 'Acta2', 'Itgam', 'Cd14', 'Cd68', 'Spp1', 'Pparg', 'Jchain', 'Cd86', 'Cpa3', 'Foxp3', 'Cd3e','Nkg7', 'Foxj1', 'Rtkn2', 'Krt8', 'Cdkn1a', 'Abca3', 'Scgb1a1']
sc.pl.matrixplot(adata, markers, groupby='subcluster3', cmap='YlGnBu', standard_scale='var', dendrogram=True, save='markers.png')
WARNING: saving figure to file figures/matrixplot_markers.png